import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import pandas
from sklearn import preprocessing
import sklearn.decomposition as sk_decomp

def sum_of_squares(x):
    return np.sum(x * x)


def norm(x):
    return np.sqrt(sum_of_squares(x))


def corr(x, y, return_p=False):
    r, p = stats.pearsonr(x, y)
    if return_p:
        return r, p
    return r


def corr_matrix(t1, t2=None):
    if t2 is None:
        return _corr_matrix(t1, t1)
    else:
        return _corr_matrix(t1, t2)
    return


def _corr_matrix(v1, v2):
    n = v1.shape[0]
    sums = np.multiply.outer(v2.sum(0), v1.sum(0))
    stds = np.multiply.outer(v2.std(0), v1.std(0))
    return ((v2.T  @ v1) - sums / n) / stds / n


def per_component_r2(t, p, orig, verbose=False):
    n_comps = t.shape[1]
    x = np.zeros(orig.shape)
    r2_sum = np.zeros(n_comps)
    r2 = np.zeros(n_comps)
    ss_orig = sum_of_squares(orig)

    if verbose:
        print(f'Number of components: {n_comps}')

    for i in np.arange(n_comps):
        comp_x = t[:, [i]] @ p[:, [i]].T
        x += comp_x
        r2[i] = 1 - (sum_of_squares(orig - comp_x) / ss_orig)
        r2_sum[i] = 1 - (sum_of_squares(orig - x) / ss_orig)
        print(f'Component {i+1}: R2: {r2[i]:.4} R2_sum: {r2_sum[i]:.4}')


    return r2, r2_sum


def compare_components(t1, p1, t2, p2, original_block=None):
    if original_block is not None:
        orig_ss = sum_of_squares(original_block)

    f, axarr = plt.subplots(2, sharex=True)
    plt.title("Component-wise explained variation")
    axarr[0].bar(range(0, t1.shape[1]), np.sum(t1 ** 2, axis=0))
    axarr[1].bar(range(0, t2.shape[1]), np.sum(t2 ** 2, axis=0))

    # plt.figure(2)
    f, axarr = plt.subplots(2, sharex=True)
    axarr[0].plot(p1)
    axarr[1].plot(p2)
    plt.show()
    return


def create_dummy_variables(categorical, **kwargs):
    return pandas.get_dummies(categorical, **kwargs)



def normal_scale(X: np.ndarray, return_scaler=False):
    scaler = preprocessing.StandardScaler()
    scaler.fit(X)

    if return_scaler:
        return (scaler.transform(X), scaler)
    else:
        return scaler.transform(X)


def center(x: np.ndarray, return_scaler=False, copy=False):
    if copy:
        x = x.copy()
    scaler = preprocessing.StandardScaler(with_std=False)

    scaler.fit(x)

    if return_scaler:
        return scaler.transform(x), scaler
    else:
        return scaler.transform(x)


def uv(x, return_scaler=False):
    scaler = preprocessing.StandardScaler(with_mean=False)
    scaler.fit(x)

    if return_scaler:
        return scaler.transform(x), scaler
    else:
        return scaler.transform(x)

def pca_svd(X, n_components):
    _pca_instance = sk_decomp.PCA(n_components=n_components)
    _pca_instance.fit_transform(X)
    T = _pca_instance.transform(X)

    return T, _pca_instance.components_.T
